# -*- coding: utf-8 -*-
import re

import scrapy


class JobboleSpider(scrapy.Spider):
    name = 'jobbole'
    allowed_domains = ['blog.jobbole.com']
    start_urls = ['http://blog.jobbole.com/114276/']






    def parse(self, response):
        title = response.css('.entry-header h1::text').extract()[0]
        date = response.css('.entry-meta p::text').extract_first().replace('·', '').strip()
        tag_list = response.css('.entry-meta-hide-on-mobile a::text').extract()
        tag = [element for element in tag_list if not element.strip().endswith('评论')]
        tags = ','.join(tag)
        content = response.css('.entry').extract()
        praise_nums = int(response.css('.post-adds h10::text').extract()[0])
        collect_nums_str = response.css('.bookmark-btn::text').extract()[0]
        collect_nums_match = re.match('.*?(\d+).*', collect_nums_str)
        if collect_nums_match:
            collect_nums = int(collect_nums_match.group(1))
        else:
            collect_nums = 0                #取出收藏数，收藏数为' 3 收藏'字样，或者为' 收藏‘，有数字则取出数字，没数字取为0
        comment_nums_str = response.css('a[href="#article-comment"] span::text').extract()[0]
        comment_nums_re = re.match('.*?(\d+).*', comment_nums_str)
        if comment_nums_re:
            comment_nums = int(comment_nums_re.group(1))
        else:
            comment_nums = 0                #提取规则与collect_nums同理
        pass
